TODO

  • Site summary data

  • Biweekly summary of variables

    • Figures plus events timeline
    • Correlation & Heatmap
    • Add case counts / fix figure
  • Case Date Imputation - rule-based guesses

  • +3d from symptoms

  • +2d from test

  • -7d from end of isolation

  • Model for cases ~ .

  • Test high vs low traffic as FE in RE model

  • Finish written methods and results

  • Garbage data in, garbage inferences out… recurring problem for every project

Case Data…

Multiple imputation (mice) has a key assumption that data is MAR. But data is not MAR (arguably), it’s missing because of some dynamics not present in the data, like how much an admin cared to record cases when there were many of them. Using mice means making the analysis much more complicated (painfully so)…

Instead I used a simpler ad-hoc method: - if positive-test-result date is not available, then - use -5 days from end-of-isolation date, or if not available, - use the test date as is, or if not available, - use +3 days from symptoms-began date, or if not available,

It’s not really the most sound approach, but the data is questionable to begin with. It’s clear that the university gave up on collecting this data.


Analysis code
# Load ------------------------------------------------------
library(tidyverse)
library(here)
library(ggiraph)
library(patchwork)
ggplot2::theme_set(theme_bw())
source(here('R/report_plots.R'))

# Functions --------------------------------------------------

# creates yyyy-ww label for grouping data
get_date_week <- function(x){
  y <- lubridate::year(x)
  w <- lubridate::week(x) |> str_pad(2, 'left', 0)
  str_glue("{y}-{w}")
}

# get biweekly bin date for vector of dates
get_date_biweekly <- function(x){
  day_of_week <-  lubridate::wday(x)
  case_when(
    day_of_week %in% 1:3 ~ x + 3 - day_of_week,
    TRUE ~ x + 5 - day_of_week,
  )
}

# dates 2021-2022 with biweekly groups
biweekly_date_sequence <- function(){
  tibble(
    date = seq(
      as_date('2021-01-01'), 
      as_date('2022-12-31'), 
      by = 1)
  ) |> 
    mutate(biweek = get_date_biweekly(date))
}

# lookup table for date
week_midpoint_date_lookup <- function(start = '2021-01-01', end = '2023-01-01'){
  ends = lubridate::as_date(c(start, end))
  tibble(
    date = seq(ends[1], ends[2], by = 1),
    date_week = lubridate::week(date) |> str_pad(2, 'left', '0'),
    date_year = lubridate::year(date),
    week = str_glue("{date_year}-{date_week}"),
  ) |> 
    group_by(week) |> 
    summarise(date = mean(date))
}

binom_ci <- function(x, n){
  Hmisc::binconf(x, n, method = 'wilson', alpha = 0.05) |> 
    as_tibble() |> 
    janitor::clean_names()
}

get_binom_ci <- function(data){
  data |> 
    summarise(
      x = sum(pcr_positive == 'Positive'),
      n = n(),
      binconf = map2(x, n, ~binom_ci(.x, .y))
    ) |> 
    unnest(binconf) |> 
    mutate(across(c(point_est, lower, upper), ~(.x*100) |> round(1)))
}


# Plot Elements ----------------------------------------------

theme_color <- 'cornflowerblue'

# layout x-axis
scale_x_study_dates <- function(){
    scale_x_date(
      date_breaks = 'month',
      date_labels = month.name[c(9:12, 1:5)] |> strtrim(3),
      limits = c(
        min(swabs_tidy$date_swab), 
        max(swabs_tidy$date_swab)
      ))
}
# get rid of x-axis for vertically stacked plots
remove_x_labels <- function(){
  theme(axis.ticks.x = element_blank(),
        axis.text.x = element_blank())
}
# no minor grid
remove_grid <- function(){
  theme(panel.grid.minor = element_blank(),
        plot.subtitle = element_markdown(size = 8, hjust = 0), 
        plot.margin = unit(c(0.1, 1, 2, 0), unit = 'mm'),
        plot.title.position = 'panel')
}



# Data -------------------------------------------------------

# swab data
swabs <- 
  read_rds(here('data/cube.rds')) |> 
  filter(str_detect(site, '^UO_')) 

# lookup table for waste water site names
lookup_ww <- tribble(
  ~site_abbr, ~site,
  'UO_AA',  'Annex Residence',
  'UO_FA',  'Faculty of Social Sciences',
  'UO_FT',  'Friel Residence',
  'UO_NA',  'Northern sampling site',
  'UO_RGN', 'Roger Guindon Hall',
  'UO_ST',  'Southern sampling site',
  'UO_TBT', 'Tabaret Hall'
)

# UOttawa waste water data from R. Delatolla
uottawa_ww <-
  readxl::read_excel(here::here('data/ww_university.xlsx')) |>
  janitor::clean_names() |> 
  mutate(sample_date = as_date(sample_date)) |> 
  filter(
    sample_date > '2021-09-15', 
    sample_date < '2022-05-05',
    !is.na(viral_copies_per_copies_pep_avg),
    site != 'UO_RGN'
    ) |> 
  left_join(lookup_ww, by = c('site' = 'site_abbr')) |> 
  mutate(
    source =
      source_name |>
      str_remove('^Data - uOttawa - ') |>
      str_remove('.xlsx$')
  ) |> 
  transmute(site,
            sample_date, 
            signal = viral_copies_per_copies_pep_avg)

# ottawa wastewater data: daily means
regional_ww <- 
  read_rds(here('data/ww_ottawa.rds')) |> 
  select(sample_date, starts_with('cov_')) |> 
  pivot_longer(contains('cov_')) |> 
  mutate(target = str_extract(name, 'cov_n.'),
         stat = str_extract(name, 'mean|sd'),
         ) |> 
  select(-name) |> 
  pivot_wider(names_from = stat, values_from = value) |> 
  mutate(.lower = mean - sd, .upper = mean + sd)   

# data from university of ottawa
wifi <- 
  read_rds(here('data/uo_wifi.rds')) |> 
  filter(date >= min(swabs$date_swab), 
         date <= max(swabs$date_swab))

# important dates for context
event_dates <- 
  tribble(
    ~event, ~start, ~end,
    'Reading Week',  '2021-10-24', '2021-10-30',
    'Holiday Break', '2021-12-22', '2022-01-04',
    'Closure',       '2022-01-04', '2022-01-31',
    'Closure',       '2022-02-16', '2022-02-21',
    'Reading Week',  '2022-02-20', '2022-02-26',
  ) |> 
  mutate(across(start:end, as_date))

# UOttawa cases data
cases <-
  read_rds(here('data/cases_rule_imputed.rds')) |> 
  select(case, role, case_date, UC:TBT) |>
  mutate(biweek = get_date_biweekly(case_date)) |> 
  nest(associated_sites = UC:TBT) |> 
  print()
## # A tibble: 216 × 5
##     case role     case_date  biweek     associated_sites
##    <dbl> <fct>    <date>     <date>     <list>          
##  1     1 Employee 2020-07-20 2020-07-21 <tibble [1 × 6]>
##  2     2 Employee 2020-08-04 2020-08-04 <tibble [1 × 6]>
##  3     3 Employee 2020-09-04 2020-09-03 <tibble [1 × 6]>
##  4     4 Student  2020-09-06 2020-09-08 <tibble [1 × 6]>
##  5     5 Student  2020-09-08 2020-09-08 <tibble [1 × 6]>
##  6     7 Employee 2020-09-10 2020-09-10 <tibble [1 × 6]>
##  7     8 Employee 2020-09-06 2020-09-08 <tibble [1 × 6]>
##  8     9 Student  2020-09-11 2020-09-10 <tibble [1 × 6]>
##  9    10 Student  2020-09-11 2020-09-10 <tibble [1 × 6]>
## 10    11 Employee 2020-09-14 2020-09-15 <tibble [1 × 6]>
## # … with 206 more rows
# Summaries --------------------------------------------------

swabs_tidy <- swabs |> 
  filter(!negative_control, swab_type != 'sponge') |> 
  select(date_swab, site, floor, location, pcr_positive, pcr_ct,
         co2) |> 
  mutate(biweek = get_date_biweekly(date_swab))

positivity_summary <- function(swabs){
  swabs |> 
    summarise(
      n = n(),
      x = sum(pcr_positive == 'Positive', na.rm = F),
      positivity = round(100 * x / n, digits = 1),
      .groups = 'drop')
}

co2_summary <- function(swabs){
  swabs |> 
    summarise(
      n = n(),
      co2_vals = list(co2),
      co2_mean = mean(co2, na.rm = T),
      .groups = 'drop')
}
  
# overall summary
swab_summary <- 
  swabs_tidy |> positivity_summary()

# site summary
swab_summary_sites <- swabs_tidy |> 
  group_by(site) |> 
  positivity_summary()
  
# high-low traffic summary
swab_summary_location_traffic <- swabs_tidy |> 
  mutate(traffic = if_else(
    str_detect(location, 'Site 1'), 'high traffic', 'low traffic'
  )) |> 
  group_by(traffic) |> 
  positivity_summary()



## Figure 1: data aggregation ----

# uottawa cases - biweekly counts 
cases_biweekly <- 
  cases |> 
  select(case, biweek, role) |> 
  group_by(biweek) |> 
  summarise(cases = n(),
            cases_student = sum(role == 'Student'),
            cases_staff = sum(role == 'Employee'),
            ) |> 
  right_join(
    biweekly_date_sequence() |> 
      filter(biweek < '2022-02-03') |> 
      distinct(biweek),
    by = 'biweek'
  ) |> 
  mutate(across(where(is.integer), ~if_else(is.na(.), 0L, .))) 

# swabs biweekly summary
swabs_biweekly <- 
  swabs_tidy |>
  group_by(biweek) |> 
  positivity_summary() |> 
  left_join(swabs_tidy |> group_by(biweek) |> co2_summary())

# swabs biweekly summary by site  
swabs_biweekly_by_site <- 
  swabs_tidy |>
  group_by(site, biweek) |> 
  positivity_summary() |> 
  left_join(swabs_tidy |> group_by(site, biweek) |> co2_summary())

# UOttawa ww biweekly means
uottawa_ww_biweekly <- 
  uottawa_ww |> 
  mutate(biweek = get_date_biweekly(sample_date)) |> 
  group_by(biweek) |> 
  summarise(signal = mean(signal),
            n = n())

# daily ww data for study period
regional_ww_daily <- 
  regional_ww |> 
  filter(
    sample_date >= min(swabs_tidy$date_swab) - 4,
    sample_date <= max(swabs_tidy$date_swab) + 4,
  ) |> 
  group_by(sample_date) |> 
  summarise(mean = mean(mean))

# regional ww biweekly means
regional_ww_biweekly <- 
  regional_ww_daily |> 
  mutate(biweek = get_date_biweekly(sample_date)) |> 
  group_by(biweek) |> 
  summarise(mean = mean(mean))

# wifi site agg
wifi_extended <- read_rds(here('data/uo_wifi.rds')) |> 
  filter(date >= min(swabs$date_swab) - 2, 
         date <= max(swabs$date_swab) + 2) |> 
  mutate(biweek = get_date_biweekly(date))

# overall wifi biweekly timeseries
wifi_biweekly_sites <- 
  wifi_extended |> 
  group_by(biweek) |> 
  summarise(
    n = n(),
    min = min(clients),
    mean = mean(clients),
    max = max(clients)
  )
# per building wifi biweekly timeseries
wifi_biweekly <- 
  wifi_biweek_sites <- 
  wifi_extended |> 
  group_by(biweek) |> 
  summarise(
    n = n(),
    min = min(clients),
    mean = mean(clients),
    max = max(clients)
  )



# Map --------------------------------------------------------

swab_coords <- tribble(
  ~site, ~name, ~lat, ~lng,
  'MRT', 'Morrisette Hall (MRT)', 45.423239101483546, -75.68428970961207,
  'MNT', 'Montpetit', 45.42254171025894, -75.68265871464943,
  '90U', '90 University', 45.42242555707402, -75.6850144991752,
  'DMS', 'Desmarais Bldg.', 45.42387679340988, -75.68727075448753,
  'TBT', 'Tabaret Hall', 45.42450940162542, -75.68631900184072,
  'LPR',  'Louis Pasteur Bldg.', 45.421375668614466, -75.68026389993058,
)

ww_coords <- tribble(
  ~site, ~name, ~lat, ~lng,
  'aa', 'Annex Residence', 45.42678129026445, -75.68034405960745,
  'fa', 'Faculty of Soc. Sci.', 45.421624722628515, -75.68390386012699,
  'ft', 'Friel Residence', 45.43043440080566, -75.68290766533741,
  'tbt', 'Tabaret Hall', 45.42450940162542, -75.68631900184072
  # 'na', 'North sampling Site', NA, NA,
  # 'st', 'South Sampling Site', NA, NA, # Nobody knows where this is
)

figure_sites_map <- 
  leaflet::leaflet(swab_coords, width = 600, height = 330) |> 
  leaflet::addProviderTiles(leaflet::providers$CartoDB.Positron) |> 
  leaflet::setView(lng = -75.6843, lat = 45.4235, zoom = 14) |> 
  leaflet::addCircleMarkers(~lng, ~lat, label =  ~name, popup = ~name, 
                            radius = 2, color = '#440099') |> 
  leaflet::addLabelOnlyMarkers(
    ~lng, ~lat, label =  ~site, 
    labelOptions = leaflet::labelOptions(
      noHide = T, direction = 'top', textOnly = T, 
    )) |> 
  leaflet::addCircleMarkers(~lng, ~lat, label =  ~site, 
                            radius = 2, color = '#440099') |> 
  leaflet::addCircleMarkers(
    data = ww_coords, ~lng, ~lat, label =  ~name, popup =  ~name, 
    radius = 4, color = '#f96999')


# Results ----------------------------------------------------

rs_data <- lst(
  swabs_collected = nrow(swabs_tidy),
  positivity_ovr = swab_summary$positivity,
  positivity_site_min = min(swab_summary_sites$positivity),
  positivity_site_max = max(swab_summary_sites$positivity),
)

rs_abstract <- rs_data |> 
  glue::glue_data(
    "Over the 32-week study period, we collected {swabs_collected} swabs at six university buildings, including two buildings where wastewater surveillance was currently ongoing. 
Overall, {positivity_ovr}% of swabs were PCR-positive for SARS-CoV-2, with individual buildings ranging between {positivity_site_min}% and {positivity_site_max}%.
*Comment on when spikes in cases, swabs, and waste-water occurred….* *Comment on prediction of cases using swabs, ww, or combined approach…*
"
  )

rs_results <- rs_data |> 
  glue::glue_data()
Expand for plotting details
# fig1 -----
fig1_timeline <- 
  event_dates |> 
  ggplot(aes(x = start, xend = end, y = event, yend = event)) +
  geom_segment(linewidth = 5, lineend = 'round',
               color = theme_color,
               alpha = 0.35) +
  geom_segment(linewidth = 1, lineend = 'butt',
             color = theme_color,
             alpha = 0.9) +
  geom_point(aes(x = end), color = theme_color, size = 2) +
  geom_point(color = theme_color, size = 2) +
  scale_x_study_dates() +
  labs(y = '', x = NULL, subtitle = 'Timeline') +
  remove_x_labels() +
  remove_grid()


fig1_cases <-
  cases_biweekly |>
  mutate(biweek = if_else(wday(biweek) == 5, biweek + 0.75, biweek - 0.75)) |>
  select(biweek, cases_student, cases_staff) |>
  pivot_longer(-biweek) |>
  mutate(name = str_remove(name, 'cases_') |> str_to_title()) |>
  ggplot() +
  geom_vline(
    data = tibble(x = as_date('2022-02-03')),
    aes(xintercept = x),
    lty = 2,
    color = 'gray'
  ) +
  geom_text(
    data = tibble(
      x = as_date('2022-02-05'),
      y = 18,
      label = 'End of data'
    ),
    aes(x, y, label = label),
    size = 2,
    hjust = 0
  ) +
  geom_col(
    aes(biweek, value, fill = name),
    size = 0.15,
    width = 3.5,
    color = 'gray80',
    position = position_stack()
  ) +
  labs(
    x = NULL,
    y = NULL,
    fill = NULL,
    subtitle = 'UOttawa COVID-19 Cases',
  ) +
  scale_fill_carto_d() +
  scale_x_study_dates() +
  remove_grid() +
  remove_x_labels() +
  theme(
    axis.title.y = ggtext::element_markdown(lineheight = 1),
    legend.position = c(0.9, 0.4),
    legend.key.size = unit(2, 'mm'),
    legend.background = element_rect(color = 'grey80')
  )

fig1_swabs <- 
  swabs_biweekly |> 
  ggplot(aes(biweek, positivity)) +
  geom_smooth(
    size = 0.5,
    alpha = 0.2, 
    fill = theme_color, 
    span = 0.4,
    method = 'loess',
    formula = 'y ~ x', 
    na.rm = T) +
  geom_point(color = theme_color, alpha = 0.85, shape = 1, size = 1) +
  scale_x_study_dates() +
  labs(x = NULL, y = NULL,
       subtitle = 'Swab Positivity (%)') +
  remove_x_labels() +
  remove_grid()
  
fig1_co2 <-
  ggplot(swabs_biweekly, aes(biweek, co2_mean)) +
  geom_point(color = theme_color, shape = 1, alpha = 0.85, size = 1) +
  geom_smooth(fill = theme_color, alpha = 0.2, size = 0.5) +
  labs(x = NULL, y = NULL,
       subtitle = 'CO~2~ (ppm)') +
  scale_x_study_dates() +
  remove_x_labels() +
  remove_grid()
  
fig1_wifi <-
  wifi_biweekly |> 
  ggplot(aes(biweek, mean)) +
  geom_smooth(span = 0.5, 
              size = 0.5,
              alpha = 0.2, 
              fill = theme_color) +
  geom_point(color = theme_color,  alpha = 0.85, shape = 1, size = 1) +
  labs(x = NULL, y = NULL,
       subtitle = "Wi-Fi Connections") +
  scale_x_study_dates() +
  remove_x_labels() +
  remove_grid()

fig1_uo_ww <-
  ggplot(uottawa_ww_biweekly,
         aes(biweek, signal)) +
  geom_smooth(alpha = 0.2, size = 0.5, fill = theme_color, na.rm = T) +
  geom_point(aes(size = n),
             color = theme_color, alpha = 0.85,
             shape = 1,
             na.rm = T,
             show.legend = F) +
  labs(
    x = NULL, 
    y = NULL,
    subtitle = 'University Waste-Water',
  ) +
  scale_x_study_dates() +
  scale_size(range = c(1, 3)) +
  remove_x_labels() +
  remove_grid()

fig1_ott_ww <- 
  ggplot(regional_ww_daily) + 
  geom_smooth(aes(sample_date, mean), 
              na.rm = T,
              method = 'loess', formula = 'y ~ x',
              span = 0.2, size = 0.5, alpha = 0.2,
              fill = theme_color) +
  geom_point(data = regional_ww_biweekly,
             aes(biweek, mean),
             na.rm = T,
             alpha = 0.85, shape = 1, size = 1,
             color = theme_color) +
  labs(x = 'Date',
       y = NULL,
       subtitle = 'Regional Waste-Water',
       ) +
  scale_x_study_dates() +
  remove_grid() +
  theme(axis.title.y = ggtext::element_markdown(lineheight = 1)) 

figure_1A <- wrap_plots(
    fig1_timeline, fig1_cases, fig1_swabs, fig1_co2, 
    fig1_wifi, fig1_uo_ww, fig1_ott_ww
  ) & 
  plot_layout(ncol = 1, heights = c(1))

rm(fig1_timeline, fig1_swabs, fig1_co2, fig1_wifi, fig1_uo_ww, fig1_ott_ww, fig1_cases)

New

Abstract Results

Over the 32-week study period, we collected 566 swabs at six university buildings, including two buildings where wastewater surveillance was currently ongoing. Overall, 13.4% of swabs were PCR-positive for SARS-CoV-2, with individual buildings ranging between 4.7% and 34%. Comment on when spikes in cases, swabs, and waste-water occurred…. Comment on prediction of cases using swabs, ww, or combined approach…


Written Results


Summary Tables

Building Summary
Site N PCR-Positive %
Overall 566 76 13.4
UO_90U 100 34 34.0
UO_DMS 88 6 6.8
UO_MRT 92 13 14.1
UO_MNT 100 9 9.0
UO_TBT 86 4 4.7
UO_LPR 100 10 10.0

Positivity by sample location traffic-level
Traffic level N PCR-Positive %
high traffic 286 43 15.0
low traffic 280 33 11.8

Figure 1: Timeseries

Figure 1: Time-series of (top to bottom) important events at the university, proportion of PCR-positive swabs, biweekly mean ambient C02 (across collection sites), biweekly mean peak number of Wi-Fi connections (across 5 buildings), biweekly mean waste-water signal (across up to 6 collection sites; relative to PPMoV), biweekly mean regional waste-water detection (relative to PPMoV).

Spearman Correlation

term Swab PCR CO2 Wi-Fi University WW Ottawa WW Cases
CO2 -0.19 NA NA NA NA NA
Wi-Fi -0.27 0.11 NA NA NA NA
University WW 0.63 0.02 -0.21 NA NA NA
Ottawa WW 0.70 -0.23 -0.31 0.67 NA NA
Cases 0.74 -0.19 -0.36 0.72 0.5 NA


Map

Where are these collection sites?

Pink markers represent waste-water collection sites; purple markers represent buildings where surface testing was performed. Only one building, Tabaret Hall, had both surface and waste-water testing. The locations of two waste-water sampling sites could not be ascertained and are not displayed on the map.

It remains unclear which buildings the ‘South Sampling Site’ and ‘North Sampling Site’ catchment areas are related to (not mapped).


Waste Water - UO

I’ve removed the off-campus site (Robert Guidon Hall) and retained the other 6 near-campus sites (raw data below).


📊 Summary

This plot shows the counts of positive (red) and negative (yellow) samples collected at each facility over time (x-axis). Samples that could not be tested are shown in navy. Only flocked swabs are shown. (Other sponge swabs were collected on 2022-04-28 were for comparing flocked and sponge swabs.)


⡯⡷ Dotplot

This plot shows the counts of positive and negative samples collected at each facility over time.


Old analysis

Model

This section contains results from modeling SARS-CoV-2 cases at UO using swab-PCR results as a predictor.


Specification

We created a random intercepts logistic regression model with the occurrence of cases (binary) as outcome and swab results for the previous week (the proportion of positive swabs) as predictor. The model has a random intercept for each site.

Our model formula is cases ~ positives[lag 1 week] + (1|site).

The model is fit as follows:

swab_model <-
  blme::bglmer(
    cases_binary ~ detection_lag_1week + (1 | site),
    data =  uo_sites,
    family = binomial
  )


Model response time-series

These plots show the swab results, cases, and predictions by the current model.



Model summary

These tables show the model coefficients and statistics.

Model statistics
nobs sigma logLik AIC BIC deviance df.residual
84 1 -24.52 55.04 62.33 42.83 81

Model parameters
effect group term estimate std.error statistic p.value
fixed NA (Intercept) -3.301 0.804 -4.107 0.0000401
fixed NA detection_lag1w 7.114 2.112 3.369 0.0007549
ran_pars site sd__(Intercept) 1.067 NA NA NA



  Relation between UO cases (y/n) and proportion of positive swabs the previous week
Predictors Odds Ratios CI p
(Intercept) 0.04 0.01 – 0.18 <0.001
Swabs @ t-1week 1229.31 19.59 – 77128.28 0.001
Random Effects
σ2 3.29
τ00 site 1.14
ICC 0.26
N site 6
Observations 84
Marginal R2 / Conditional R2 0.378 / 0.538



Model response curves

This plot shows the modelling data as points (detection lagged 1 week and cases at a given site- y/n), as well as how the probability of future cases varies by the previous weeks detection level and site, according to our model (curves).



Random Intercepts for Sites

This plot shows the odds ratios for the intercepts of the model (an intercept for each site).



EDA

Swabs and Cases

This panel shows the cases counts at UO over the course of our sampling period. The case data shown represents the days on which a positive test was reported (black rug lines) and the presumed start of transmissiblity for each case (red lines).



Swabs, Wifi, and CO2

This panel shows linked data from swab results, CO2 readings, and wifi traffic (number of users @ peak, daily) during our study period. Unfortunately, we do not have wifi data for 90U.



Wifi Traffic

This panel shows a time-series of the daily peak number of wifi users at UO facilities. Sampling days are highlighted in blue.